program define estimators_022609, eclass

*** NOTES: 
*** Program calculates the estimates used in the bootstrap program "bootstrap_022609.do".
*** This program is for the outcome earnq16.
*** Please refer to the documentation for details.
*** Flores, Flores-Lagunes, Gonzalez, and Neumann 12/02/2010 


version 9.2
marksample touse 

set more off
* 0. We save the initial number of observations
quietly count
scalar n_all=r(N)


* 1. ESTIMATE GPS. 
quietly xi: glm T 	r_2 r_3 r_4 uerateb_* uerate1635_1 uerate1635_2 uerate1635_3 uerate1635_4   /// 
moprejc_*  notenroll_* prlsi30_* prlsi90_* prlsi180_* prlsi270_* kv_haschld_* b_mar_*   /// 
base_r_head_* base_age_* b_age2_* b_age3_* base_hs_d_* base_ged_d_* base_voc_d_*    ///
base_any_ed1_* base_hgc_* b_hgc2_* b_english_* Hl_base_hea_2_* Hl_base_hea_3_* Hl_base_hea_4_*   /// 
base_py_cig_* base_py_alchl_* base_py_pot_* base_wkearnr_* base_evworkb_* base_evarrst1_*    ///
b_pmsa_* b_msa_* b_livew2parents_* base_hadworry_* TJ_base_typ_1_* TJ_base_typ_2_*    ///
TJ_base_typ_3_* TJ_base_typ_4_* TJ_base_typ_5_* TJ_base_typ_6_* TJ_base_typ_7_*    ///
TJ_base_typ_8_* TJ_base_typ_9_* TJ_base_typ_10_* TJ_base_typ_11_* base_knewcntr_*    ///
base_e_math_p_* base_e_read_p_* base_e_along_p_* base_e_contrl_p_* base_e_esteem_p_*    ///
base_e_spcjob_* base_e_friend_p_* b_hearpjc_* base_knew_jc_* base_r_crgoal_p_* base_r_train_p_*   /// 
base_r_getged_p_* base_r_nowork_p_* base_r_comm_p_* base_r_home_p_* base_r_other_p_*    ///
kv_nonres_* cent_1_* state_*, vce(robust) f(gamma) l(log) search
predict T_hat    // Default is to calculate mu=E(y)=g(xB)  
gen a=1/e(phi)
gen b=T_hat/a
gen Rti= gammaden(a,b,0,T)   // We call the "own" gps --f(ti|xi)-- Rti



* 2. IMPOSE OVERLAP CONDITION BASED ON 5 GROUPS. 
* NOTE: We generate a new variable "overlap" which equals one if satisfy overlap condition and 0 o/w.  
*** Create 5 groups based on percentiles
quietly egen Q1=pctile(T), p(20)
quietly egen Q2=pctile(T), p(40)
quietly egen Q3=pctile(T), p(60)
quietly egen Q4=pctile(T), p(80)

gen G1=0
gen G2=0 
gen G3=0 
gen G4=0 
gen G5=0 
quietly replace G1=1 if T<Q1
quietly replace G2=1 if T<Q2 & G1~=1
quietly replace G3=1 if T<Q3 & G1~=1 & G2~=1
quietly replace G4=1 if T<Q4 & G1~=1 & G2~=1 & G3~=1
quietly replace G5=1 if T>=Q4

*** Get median value of T for each of the 5 groups, and calculate the GPS, f(t|xi),
*** at each of these values for all units (i.e., 5 gps for each individual).

egen TT1=pctile(T), p(10)    //We call it TT since T2 is used for treatment square
egen TT2=pctile(T), p(30)
egen TT3=pctile(T), p(50)
egen TT4=pctile(T), p(70)
egen TT5=pctile(T), p(90)

gen gps_G1= gammaden(a,b,0,TT1)
gen gps_G2= gammaden(a,b,0,TT2)
gen gps_G3= gammaden(a,b,0,TT3)
gen gps_G4= gammaden(a,b,0,TT4)
gen gps_G5= gammaden(a,b,0,TT5)


*** Impose overlap condition as the intersection of the groups.  

* Generate all minima and maxima used in overlap condition.
local i = 1
while `i' <=5 {
		quietly egen temp=max(gps_G`i') if G`i'==0
		quietly egen mx_G`i'_0=max(temp)
		drop temp
		quietly egen temp=max(gps_G`i') if G`i'==1
		quietly egen mx_G`i'_1=max(temp)
		drop temp
		quietly egen temp=min(gps_G`i') if G`i'==0
		quietly egen mn_G`i'_0=min(temp)
		drop temp
		quietly egen temp=min(gps_G`i') if G`i'==1
		quietly egen mn_G`i'_1=min(temp)
		drop temp 
	local i = `i' + 1
}

* We get the maximum of the minimums and the minimum of the maximum for each of the
* 5 comparisons. These are the values that bind, the others can be ignored.   

forvalues i=1/5 {
	gen max_min_`i'=0   // Gives the maximum of the minima. Only one of the two lines below holds.
	replace max_min_`i'= mn_G`i'_0 if  mn_G`i'_0>= mn_G`i'_1   // Set equal to avoid ties. If equal does not matter.
	replace max_min_`i'= mn_G`i'_1 if  mn_G`i'_1> mn_G`i'_0
	
	gen min_max_`i'=0   // Gives the minimum of the maxima. Only one of the two lines below holds.
	replace min_max_`i'= mx_G`i'_0 if  mx_G`i'_0< =mx_G`i'_1   // Set equal to avoid ties. If equal does not matter.
	replace min_max_`i'= mx_G`i'_1 if  mx_G`i'_1< mx_G`i'_0
}

* Impose overlap conditions. Units must be comparale in all 5 groups.
gen overlap=0
replace overlap=1 if ( (gps_G1>max_min_1) & (gps_G1<min_max_1) & (gps_G2>max_min_2) & (gps_G2<min_max_2)    ///
& (gps_G3>max_min_3) & (gps_G3<min_max_3) & (gps_G4>max_min_4) & (gps_G4<min_max_4) & (gps_G5>max_min_5)    ///
& (gps_G5<min_max_5)  ) 

* Drop variables we no longer need
drop mx_* mn_* min_max* max_min* Q* G* TT1 TT2 TT3 TT4 TT5 gps_G* T_hat




* 3. GET THE ESTIMATED GPS AT EACH OF THE POINTS WHERE THE DRF WILL BE ESTIMATED.
* GPS is estimated at the "original" 99 percentiles of the T distribution
  
forvalues i = 1/99 {
	quietly gen Rt_`i' = gammaden(a,b,0,Tper`i') if overlap==1  
}

* Estimated GPS at each of the 99 percentiles plus one points.
 
forvalues i = 1/99 {
	quietly gen Rt_`i'_plus1 = gammaden(a,b,0,Tper`i'_plus1) if overlap==1  
}


***** ----------------------------------------------------------------------------------------
***** ----------------------------------------------------------------------------------------
***** ----- CODE BELOW COMPUTES THE ESTIMATORS OF THE DRF AND ITS DERIVATIVE -----------------


*************************** ----- OUTCOME IN LEVELS ----- ************************************


* 4. OLS OUTCOME ON TREATMENT AND COVARIATES AFTER IMPOSING OVERLAP.
* We use same covariates/specification as in GLM.

xi: regress y T T2 T3 r_2 r_3 r_4 uerateb_* uerate1635_1 uerate1635_2 uerate1635_3 uerate1635_4   /// 
moprejc_*  notenroll_* prlsi30_* prlsi90_* prlsi180_* prlsi270_* kv_haschld_* b_mar_*   /// 
base_r_head_* base_age_* b_age2_* b_age3_* base_hs_d_* base_ged_d_* base_voc_d_*    ///
base_any_ed1_* base_hgc_* b_hgc2_* b_english_* Hl_base_hea_2_* Hl_base_hea_3_* Hl_base_hea_4_*   /// 
base_py_cig_* base_py_alchl_* base_py_pot_* base_wkearnr_* base_evworkb_* base_evarrst1_*    ///
b_pmsa_* b_msa_* b_livew2parents_* base_hadworry_* TJ_base_typ_1_* TJ_base_typ_2_*    ///
TJ_base_typ_3_* TJ_base_typ_4_* TJ_base_typ_5_* TJ_base_typ_6_* TJ_base_typ_7_*    ///
TJ_base_typ_8_* TJ_base_typ_9_* TJ_base_typ_10_* TJ_base_typ_11_* base_knewcntr_*    ///
base_e_math_p_* base_e_read_p_* base_e_along_p_* base_e_contrl_p_* base_e_esteem_p_*    ///
base_e_spcjob_* base_e_friend_p_* b_hearpjc_* base_knew_jc_* base_r_crgoal_p_* base_r_train_p_*   /// 
base_r_getged_p_* base_r_nowork_p_* base_r_comm_p_* base_r_home_p_* base_r_other_p_*    ///
kv_nonres_* cent_1_* state_* if overlap==1

predict yhat if overlap==1
* Save scalars we use to calculate DRF. They will be used also below to get derivative.
scalar bT=_b[T]
scalar bT2=_b[T2]
scalar bT3=_b[T3]
matrix DRFolsX=J(1,99,0)   // Matrix where to save DRF for model adjustng using ols
forvalues i=1/99 {
	* Get yhat for each unit at the corresponding Tper
	quietly gen yhatTper=yhat- bT*T - bT2*T2 - bT3*T3 + bT*Tper`i' + (bT2*Tper`i'_2) ///
	 + (bT3*Tper`i'_3)
	quietly sum yhatTper if overlap==1, meanonly
	matrix DRFolsX[1,`i']=r(mean)
	drop yhatTper
} 

* We keep yhat because we use it below to get derivative
 


* 5. GET ESTIMATE OF THE DRF USING HIRANO AND IMBENS PARAMETRIC APPROACH.
*** a). Get coefficients of E[yi|ti,Ri]. Own GPS is called "Rti" 
gen Rti2=Rti^2
gen Rti3=Rti^3
gen TRti=T*Rti   // Interaction term
regress y T T2 T3 Rti Rti2 Rti3 TRti if overlap==1, robust
matrix b=e(b)              // Save coefficients. Used below to get DRF and DERIVATIVE of DRf below.
drop Rti2 Rti3 TRti  
 
*** b). Cumpute DRF=E[Y(t)] for various levels of t: At each of the 99 percentiles.
matrix DRFp=J(1,99,0)   // Matrix where to save DRF for parametric partial mean

forvalues i = 1/99 {
* Create temporal variables where to evaluate regression 
quietly gen Rt2=Rt_`i'^2    
quietly gen Rt3=Rt_`i'^3
quietly gen TperRt=Tper`i'*Rt_`i'
* Get vector of means
quietly tabstat Tper`i' Tper`i'_2 Tper`i'_3 Rt_`i' Rt2 Rt3 TperRt one if overlap==1, save 
matrix xo=r(StatTotal)    // Save vector of means  
* Get DRF at t
matrix DRFp[1,`i']=xo*b'       // Get DRF at t
drop Rt2 Rt3 TperRt
}

matrix drop xo



* 6. GET ESTIMATE OF THE DRF USING SEMIPARAMETRIC WEIGHTING APPROACH.
*** a). Get bandwidth. We use Fan and Gijbels rule of thumb. 

reg y T T2 T3 T4 if overlap==1
sca sigma2=e(rss)/e(df_r)
* Get estimate of sum of squares of second derivatives
gen d2m2= ( (2*_b[T2]) + (6*_b[T3]*T) + (12*_b[T4]*(T^2)) )^2 if overlap==1
sum d2m2
scalar sumd2m2=r(sum) 
* Get range of T
sum T if overlap==1
scalar range=r(max)-r(min)
* Get bandwidth. we write it as scalar.  
scalar bw= ( (sigma2*range) / (2*sqrt(_pi)*sumd2m2) )^(0.2) 

disp("Bandwidth used in weighting")
sca list bw

drop d2m2
scalar drop sigma2 sumd2m2 range 

*** b). Get DRF using inverse by the GPS based on local linear regression.
** Weighting estimator is equivalent to a local linear regression using a "new kernel" which 
** equals the original one now divided by the gps at t: f(t|xi). 

matrix DRFw=J(1,99,0)   // Matrix where to save DRF for weighting
forvalues i = 1/99 {
quietly gen weights= exp( -0.5 * (((Tper`i'-T)/bw))^2 ) / Rt_`i'  //Generate weights. Normal kernel.
quietly gen Ttemp=T-Tper`i' if overlap==1
quietly reg y Ttemp [pweight=weights] if overlap==1  // Run local linear regression
matrix DRFw[1,`i']=_b[_cons]
drop weights Ttemp
}



* 7. GET ESTIMATE OF DERIVATIVE FOR OLS CONTROLLING FOR COVARIATES AFTER IMPOSING OVERLAP.
* We obtain the "derivative" by discrete difference. 

matrix DRFolsX_plus1=J(1,99,0)   // Matrix where to save DRF for t+1
forvalues i=1/99 {
	* Get yhat for each unit at the corresponding Tper + 1
	quietly gen yhatTperp1=yhat- bT*T - bT2*T2 - bT3*T3 + bT*Tper`i'_plus1   ///
	 + (bT2*Tper`i'_plus1_2) + (bT3*Tper`i'_plus1_3)
	quietly sum yhatTperp1 if overlap==1, meanonly
	matrix DRFolsX_plus1[1,`i']=r(mean)
	drop yhatTperp1
} 

** Get "Derivative" by difference.
matrix dDRFolsX=DRFolsX_plus1 - DRFolsX    //vector of "derivatives" of DRF 

matrix drop DRFolsX_plus1
scalar drop bT bT2 bT3

drop yhat
 


* 8. GET ESTIMATE OF THE DERIVATIVE OF THE DRF USING HIRANO AND IMBENS PARAMETRIC APPROACH.
** We use the approach in Bia and Mattei (Stata Journal, 2008, pages 364 & 271). It equals the
** DRF at t+1 minus the DRF at t. 

*** a). Get DRF at t+1. Regression coefficients from first stage come from 6.a. above.

matrix DRFp_plus1=J(1,99,0)   // Matrix where to save DRF for t+1

forvalues i = 1/99 {
* Create temporal variables where to evaluate regression 
quietly gen Rtp12=Rt_`i'_plus1^2
quietly gen Rtp13=Rt_`i'_plus1^3
quietly gen TperRtp1=Tper`i'_plus1*Rt_`i'_plus1
* Get vector of means
quietly tabstat Tper`i'_plus1 Tper`i'_plus1_2 Tper`i'_plus1_3 Rt_`i'_plus1 Rtp12 Rtp13 TperRtp1 one if overlap==1, save 
matrix xo=r(StatTotal)    // Save vector of means  
* Get DRF at t
matrix DRFp_plus1[1,`i']=xo*b'       // Get DRF at t
drop Rtp12 Rtp13 TperRtp1 
}

matrix drop xo

*** b). Get "Derivative" by difference.
matrix dDRFp=DRFp_plus1-DRFp    //vector of derivatives of DRFp 

matrix drop DRFp_plus1



* 9. GET ESTIMATE OF DERIVATIVE OF DRF USING SEMIPARAMETRIC WEIGHTING APPROACH.
*** a). Get bandwidth. We use Fan and Gijbels rule of thumb. 

reg y T T2 T3 T4 T5 if overlap==1
sca sigma2=e(rss)/e(df_r)
* Get estimate of sum of squares of third derivatives
gen d3m2= ( (6*_b[T3]) + (24*_b[T4]*T) + (60*_b[T5]*(T^2)) )^2 if overlap==1
sum d3m2
scalar sumd3m2=r(sum) 
* Get range of T
sum T if overlap==1
scalar range=r(max)-r(min)
* Get bandwidth. we write it as scalar.
scalar dbw= ( (3*sigma2*range) / (8*sqrt(_pi)*sumd3m2) )^(1/7) 

disp("Bandwidth used in weighting")
sca list dbw

drop d3m2
scalar drop sigma2 sumd3m2 range 


*** b). Get Derivative of DRF using inverse by the GPS based on local linear regression.
** We use a local polynomial regression of ORDER TWO (p=2; so that p-v=1 is odd) with 
** a GAUSSIAN kernel. 

matrix dDRFw=J(1,99,0)   // Matrix where to save derivative of DRF using weighting
forvalues i = 1/99 {
quietly gen weights= exp( -0.5 * (((Tper`i'-T)/dbw))^2 ) / Rt_`i'  //Generate weights. Normal kernel.
quietly gen Ttemp=T-Tper`i' if overlap==1
quietly gen Ttemp2=(T-Tper`i')^2 if overlap==1
quietly reg y Ttemp Ttemp2 [pweight=weights] if overlap==1  // Run local linear regression
matrix dDRFw[1,`i']=_b[Ttemp]
drop weights Ttemp Ttemp2
}



***** --------------------------------------------------------------------------------------
*************************** ----- OUTCOME IN DIFFERENCES ----- *****************************


* NOTE: The following code mostly repeats the computations above for the levels

* 10. OLS OUTCOME ON TREATMENT AND COVARIATES AFTER IMPOSING OVERLAP. OUTCOME IN DIFFERENCES.

xi: regress dy T T2 T3 r_2 r_3 r_4 uerateb_* uerate1635_1 uerate1635_2 uerate1635_3 uerate1635_4   /// 
moprejc_*  notenroll_* prlsi30_* prlsi90_* prlsi180_* prlsi270_* kv_haschld_* b_mar_*   /// 
base_r_head_* base_age_* b_age2_* b_age3_* base_hs_d_* base_ged_d_* base_voc_d_*    ///
base_any_ed1_* base_hgc_* b_hgc2_* b_english_* Hl_base_hea_2_* Hl_base_hea_3_* Hl_base_hea_4_*   /// 
base_py_cig_* base_py_alchl_* base_py_pot_* base_wkearnr_* base_evworkb_* base_evarrst1_*    ///
b_pmsa_* b_msa_* b_livew2parents_* base_hadworry_* TJ_base_typ_1_* TJ_base_typ_2_*    ///
TJ_base_typ_3_* TJ_base_typ_4_* TJ_base_typ_5_* TJ_base_typ_6_* TJ_base_typ_7_*    ///
TJ_base_typ_8_* TJ_base_typ_9_* TJ_base_typ_10_* TJ_base_typ_11_* base_knewcntr_*    ///
base_e_math_p_* base_e_read_p_* base_e_along_p_* base_e_contrl_p_* base_e_esteem_p_*    ///
base_e_spcjob_* base_e_friend_p_* b_hearpjc_* base_knew_jc_* base_r_crgoal_p_* base_r_train_p_*   /// 
base_r_getged_p_* base_r_nowork_p_* base_r_comm_p_* base_r_home_p_* base_r_other_p_*    ///
kv_nonres_* cent_1_* state_* if overlap==1

predict yhatd if overlap==1
* Save scalars we use to calculate DRF. They will be used also below to get derivative.
scalar bT=_b[T]
scalar bT2=_b[T2]
scalar bT3=_b[T3]
matrix DRFolsX_d=J(1,99,0)   // Matrix where to save DRF for model adjustng using ols
forvalues i=1/99 {
	* Get yhat for each unit at the corresponding Tper
	quietly gen yhatTper=yhatd- bT*T - bT2*T2 - bT3*T3 + bT*Tper`i' + (bT2*Tper`i'_2) ///
	 + (bT3*Tper`i'_3)
	quietly sum yhatTper if overlap==1, meanonly
	matrix DRFolsX_d[1,`i']=r(mean)
	drop yhatTper
} 

* We keep yhatd because we use it below to get derivative



* 11. GET ESTIMATE OF THE DRF USING HIRANO AND IMBENS PARAMETRIC APPROACH. OUTCOME IN DIFFERENCES.
*** a). Get coefficients of E[yi|ti,Ri]. Own GPS is called "Rti" 
gen Rti2=Rti^2
gen Rti3=Rti^3
gen TRti=T*Rti   // Interaction term
regress dy T T2 T3 Rti Rti2 Rti3 TRti if overlap==1, robust
matrix bd=e(b)              // Save coefficients.
drop Rti2 Rti3 TRti  
 
*** b). Cumpute DRF=E[Y(t)] for various levels of t: At each of the 99 percentiles.
matrix DRFp_d=J(1,99,0)   // Matrix where to save DRF for parametric partial mean

forvalues i = 1/99 {
quietly gen Rt2=Rt_`i'^2    
quietly gen Rt3=Rt_`i'^3
quietly gen TperRt=Tper`i'*Rt_`i'
* Get vector of means
quietly tabstat Tper`i' Tper`i'_2 Tper`i'_3 Rt_`i' Rt2 Rt3 TperRt one if overlap==1, save 
matrix xo=r(StatTotal)    // Save vector of means  
* Get DRF at t
matrix DRFp_d[1,`i']=xo*bd'       // Get DRF at t
drop Rt2 Rt3 TperRt
}


matrix drop xo



*12. GET ESTIMATE OF THE DRF USING SEMIPARAMETRIC WEIGHTING APPROACH. OUTCOME IN DIFFERENCES.
*** a). Get bandwidth. We use Fan and Gijbels rule of thumb. Remember: bandwidth is outcome dependent.

reg dy T T2 T3 T4 if overlap==1
sca sigma2=e(rss)/e(df_r)
* Get estimate of sum of squares of second derivatives
gen d2m2= ( (2*_b[T2]) + (6*_b[T3]*T) + (12*_b[T4]*(T^2)) )^2 if overlap==1
sum d2m2
scalar sumd2m2=r(sum) 
* Get range of T
sum T if overlap==1
scalar range=r(max)-r(min)
* Get bandwidth. we write it as scalar.  
scalar bwd= ( (sigma2*range) / (2*sqrt(_pi)*sumd2m2) )^(0.2) 

disp("Bandwidth used in weighting")
sca list bwd

drop d2m2
scalar drop sigma2 sumd2m2 range 


*** b). Get DRF using inverse by the GPS based on local linear regression.
** Weighting estimator is equivalent to a local linear regression using a "new kernel" which 
** equals the original one now divided by the gps at t: f(t|xi).
matrix DRFw_d=J(1,99,0)   // Matrix where to save DRF for weighting
forvalues i = 1/99 {
quietly gen weights= exp( -0.5 * (((Tper`i'-T)/bwd))^2 ) / Rt_`i'  //Generate weights. Normal kernel.
quietly gen Ttemp=T-Tper`i' if overlap==1
quietly reg dy Ttemp [pweight=weights] if overlap==1  // Run local linear regression
matrix DRFw_d[1,`i']=_b[_cons]
drop weights Ttemp
}




* 13. GET ESTIMATE OF DERIVATIVE FOR OLS CONTROLLING FOR COVARIATES AFTER IMPOSING OVERLAP.
* OUTCOME IN DIFFERENCES.

matrix DRFolsX_plus1=J(1,99,0)   // Matrix where to save DRF for t+1
forvalues i=1/99 {
	* Get yhat for each unit at the corresponding Tper + 1
	quietly gen yhatTperp1=yhatd- bT*T - bT2*T2 - bT3*T3 + bT*Tper`i'_plus1   ///
	 + (bT2*Tper`i'_plus1_2) + (bT3*Tper`i'_plus1_3)
	quietly sum yhatTperp1 if overlap==1, meanonly
	matrix DRFolsX_plus1[1,`i']=r(mean)
	drop yhatTperp1
} 

** Get "Derivative" by difference.
matrix dDRFolsX_d=DRFolsX_plus1 - DRFolsX_d    //vector of "derivatives" of DRF 

matrix drop DRFolsX_plus1
scalar drop bT bT2 bT3

drop yhatd 



* 14. GET ESTIMATE OF THE DERIVATIVE OF THE DRF USING HIRANO AND IMBENS PARAMETRIC APPROACH.
* OUTCOME IN DIFFERENCES.
** We use the approach in Bia and Mattei (Stata Journal, 2008, pages 364 & 271). It equals the
** DRF at t+1 minus the DRF at t. 

*** a). Get DRF at t+1. 

matrix DRFp_plus1=J(1,99,0)   // Matrix where to save DRF for t+1

forvalues i = 1/99 {
* Create temporal variables where to evaluate regression 
quietly gen Rtp12=Rt_`i'_plus1^2
quietly gen Rtp13=Rt_`i'_plus1^3
quietly gen TperRtp1=Tper`i'_plus1*Rt_`i'_plus1
* Get vector of means
quietly tabstat Tper`i'_plus1 Tper`i'_plus1_2 Tper`i'_plus1_3 Rt_`i'_plus1 Rtp12 Rtp13 TperRtp1 one if overlap==1, save 
matrix xo=r(StatTotal)    // Save vector of means  
* Get DRF at t
matrix DRFp_plus1[1,`i']=xo*bd'       // Get DRF at t
drop Rtp12 Rtp13 TperRtp1 
}

matrix drop xo

*** b). Get "Derivative" by difference.
matrix dDRFp_d=DRFp_plus1-DRFp_d    //vector of derivatives of DRFp 

matrix drop DRFp_plus1



* 15. GET ESTIMATE OF DERIVATIVE OF DRF USING SEMIPARAMETRIC WEIGHTING APPROACH. OUTCOME IN DIFFERENCES.
*** a). Get bandwidth. We use Fan and Gijbels rule of thumb.

reg dy T T2 T3 T4 T5 if overlap==1
sca sigma2=e(rss)/e(df_r)
* Get estimate of sum of squares of third derivatives
gen d3m2= ( (6*_b[T3]) + (24*_b[T4]*T) + (60*_b[T5]*(T^2)) )^2 if overlap==1
sum d3m2
scalar sumd3m2=r(sum) 
* Get range of T
sum T if overlap==1
scalar range=r(max)-r(min)
* Get bandwidth. we write it as scalar.
scalar dbwd= ( (3*sigma2*range) / (8*sqrt(_pi)*sumd3m2) )^(1/7) 

disp("Bandwidth used in weighting")
sca list dbwd

drop d3m2
scalar drop sigma2 sumd3m2 range 


*** b). Get Derivative of DRF using inverse by the GPS based on local linear regression.
** We use a local polynomial regression of ORDER TWO (p=2; so that p-v=1 is odd) with 
** a GAUSSIAN kernel.

matrix dDRFw_d=J(1,99,0)   // Matrix where to save derivative of DRF using weighting
forvalues i = 1/99 {
quietly gen weights= exp( -0.5 * (((Tper`i'-T)/dbwd))^2 ) / Rt_`i'  //Generate weights. Normal kernel.
quietly gen Ttemp=T-Tper`i' if overlap==1
quietly gen Ttemp2=(T-Tper`i')^2 if overlap==1
quietly reg dy Ttemp Ttemp2 [pweight=weights] if overlap==1  // Run local linear regression
matrix dDRFw_d[1,`i']=_b[Ttemp]
drop weights Ttemp Ttemp2
}



***** END OF ESTIMATORS
***** ----------------------------------------------------------------------------------------
***** ----------------------------------------------------------------------------------------


* 14. For reference, we save the sample size after overlap.
quietly count if overlap==1
scalar n_ovlp=r(N)



* 15.- FINALLY, WE COLLECT ALL RESULTS FOR THE BOOTSTRAP. POSTESTIMATION RESULTS.
* The bootstrap command needs a 1 by k vector of "estimated coefficients"
* Naming the vectors
matrix coleq DRFolsX = DRFolsX
matrix coleq DRFp = DRFp
matrix coleq DRFw = DRFw
matrix coleq dDRFolsX = dDRFolsX
matrix coleq dDRFp = dDRFp
matrix coleq dDRFw = dDRFw
matrix coleq DRFolsX_d = DRFolsX_d
matrix coleq DRFp_d = DRFp_d
matrix coleq DRFw_d = DRFw_d
matrix coleq dDRFolsX_d = dDRFolsX_d
matrix coleq dDRFp_d = dDRFp_d
matrix coleq dDRFw_d = dDRFw_d
* Matrix of sample sizes
matrix samsize=(n_all, n_ovlp)
matrix coleq samsize= sample
matrix colnames samsize= all a_ovlp
* Matrix of bandwidths
matrix bandw=(bw,dbw,bwd,dbwd)
matrix coleq bandw= bandw
matrix colnames bandw= DRFw dDRFw DRFw_d dDRFw_d
* Full matrix of stuff to bootstrap
matrix b_all=(DRFolsX,DRFp,DRFw,dDRFolsX,dDRFp,dDRFw,DRFolsX_d,DRFp_d,DRFw_d,dDRFolsX_d,dDRFp_d,dDRFw_d,samsize,bandw)


*DROP VARIABLES CREATED BEFORE EXITING ado file
drop  a b Rti overlap Rt_*

* Return postestimation output
ereturn clear
ereturn post b_all, esample(`touse') properties("b")
ereturn local cmd "estimators_022609"
end
